Jinwon Lee
  • Profile
  • Data Mining
  • R 기초 처음부터 끝까지
  • Data Visualization
  • Data Exploration
  • Spatial Information Analysis
  • OpenData Analysis
  • Big Data Analysis Engineer
  1. 데이터 시각화 의미와 기법
  • Study
    • Data Mining
    • R 기초 처음부터 끝까지
    • Data Visualization
    • Data Exploration
    • Spatial Information Analysis
    • OpenData Analysis
    • Big Data Analysis Engineer

On this page

  • Part 4 : 데이터 시각화 의미와 기법

데이터 시각화 의미와 기법

Exploration
Code
R
Author

Jinwon Lee

Published

October 5, 2022

Part 4 : 데이터 시각화 의미와 기법

employee <- read.csv('./data/employees_kr.csv')
head(employee)
##   num Incentive Nego Sex Year
## 1   1      12.1 TRUE   M 2005
## 2   2       8.9 TRUE   F 2006
## 3   3       8.8 TRUE   M 2007
## 4   4       7.1 TRUE   F 2008
## 5   5      10.2 TRUE   M 2009
## 6   6       7.0 TRUE   F 2005
sub2008 <- subset(employee, employee$Year == 2008)
head(sub2008)
##    num Incentive Nego Sex Year
## 4    4       7.1 TRUE   F 2008
## 9    9       8.2 TRUE   M 2008
## 14  14      13.4 TRUE   F 2008
## 19  19       8.4 TRUE   M 2008
## 24  24       8.0 TRUE   F 2008
## 29  29       6.8 TRUE   M 2008

hist(sub2008$Incentive)

sub2009 <- subset(employee, employee$Year ==2009)
hist(sub2009$Incentive)

subMan <- subset(employee, employee$Sex == 'M')
hist(subMan$Incentive)

subWoman <- subset(employee, employee$Sex == 'F')
hist(subWoman$Incentive)

subNego <- subset(employee, employee$Nego == "TRUE")
hist(subNego$Incentive)

subNoNego <- subset(employee, employee$Nego == "FALSE")
hist(subNoNego$Incentive)

split.screen(c(2, 1))
## [1] 1 2

screen(1)
plot(1:10)
screen(2)
plot(10:1)


close.screen(all = TRUE)
split.screen(c(2, 1))
## [1] 1 2

screen(1)
plot(1:10)
screen(2)
plot(10:1)

screen(2)
plot(1:10)


close.screen(all = TRUE)
split.screen(c(2, 3))
## [1] 1 2 3 4 5 6

screen(3)
plot(1:10)
screen(4)
plot(10:1)
screen(1)
plot(1:100)
screen(6)
plot(100:1)


close.screen(all = TRUE)
par(mfrow = c(2, 1))

plot(1:10)
plot(10:1)


close.screen(all = TRUE)
## [1] FALSE
x <- seq(1, 10, 0.1)
y <- exp(x)
plot(x, y) 

plot(x, y, main = 'Minho Graph',
     xlab = 'Time', ylab = 'Income increase')

abc <- c(260, 300, 250, 280, 310)
def <- c(180, 200, 210, 190, 170)
ghi <- c(210, 250, 260, 210, 270)
plot(abc, type = 'o', col = 'red',
     ylim = c(0, 400), axes = F, ann = F)

# type이 'o'이면 점과 선을 연결해서 꺾은선 그래프가 그려지고, 'p'이면 점만 표시
# color는 red이므로 붉은색
# y축의 범위 0 ~ 400
# axes = F이므로 X, Y축을 표시하지 않음
# ann = F이므로 X, Y축의 이름을 표시하지 않음

axis(1, at = 1:5, lab = c('A', 'B', 'C', 'D', 'E'))
axis(2, ylim = c(0, 400)) 

title(main = 'Fruit', col.main = 'red', font.main = 4)
title(xlab = 'Day', col.lab = 'black')
title(ylab = 'Price', col.lab = 'blue') 

lines(def, type = 'o', pch = 21, col = 'green', lty = 2)
lines(ghi, type = 'o', pch = 22, col = 'blue', lty = 2)

legend(4, 400, c('Orange', 'Appple', 'Banana'), cex = 0.8,
       col = c('red', 'green', 'blue'), pch = 21, lty = 1:3)

plot(10:1)
par(new = T)
plot(1:10)
par(new = T)
plot(c(6, 6, 6, 6, 6), type = 'o', col = 'red')

x <- c(50, 40, 32, 68, 120, 92)
barplot(x, names = 'Total Sale Amount')

X_matrix <- matrix(c(50, 40, 32, 68, 120, 92), 3, 2)
X_matrix
##      [,1] [,2]
## [1,]   50   68
## [2,]   40  120
## [3,]   32   92

split.screen(c(1, 2))
## [1] 1 2

screen(1)
barplot(X_matrix, names = c('Korea', 'America'))

screen(2)
barplot(X_matrix, names = c('Korea', 'America'), beside = T)

abc <- c(50, 40, 32, 68, 120, 92)

barplot(abc, main = 'abc', xlab = 'Season', ylab = 'Sales',
        names.arg = c('A', 'B', 'C', 'D', 'E', 'F'), border = 'blue',
        density = c(10, 30, 50, 80, 90, 92))

abc <- c(110, 300, 150, 280, 310)
def <- c(180, 200, 210, 190, 170)
ghi <- c(210, 150, 260, 210, 70)

B_Type2 <- matrix(c(abc, def, ghi), 5, 3)
B_Type2
##      [,1] [,2] [,3]
## [1,]  110  180  210
## [2,]  300  200  150
## [3,]  150  210  260
## [4,]  280  190  210
## [5,]  310  170   70

barplot(B_Type2, main = 'Ball Type별 시즌의 판매량', 
        xlab = 'Ball Type', ylab = '매출', beside = T,
        names.arg = c('BaseBall', 'SoccerBall', 'BeachBall'),
        border = 'blue', col = rainbow(5), ylim = (c(0, 400)))

legend(16, 400, c('A시즌', 'B시즌', 'C시즌', 'D시즌', 'E시즌'),
       cex = 0.8, fill = rainbow(5))

barplot(t(B_Type2), main = '시즌별 볼타입에 따른 판매량',
        xlab = 'Season', ylab = 'Price', beside = T,
        names.arg = c('A', 'B', 'C', 'D', 'E'),
        border = 'blue', col = rainbow(3), ylim = (c(0, 400)))

legend(16, 400, c('BaseBall', 'SoccerBall', 'BeachBall'),
       cex = 0.8, fill = rainbow(5))

barplot(t(B_Type2), main = '시즌별 볼타입에 따른 판매량(누적 표시형)',
        xlab = 'Season', ylab = '매출',
        names.arg = c('A', 'B', 'C', 'D', 'E'),
        border = 'blue', col = rainbow(3), ylim = (c(0, 1000)))

legend(4.5, 1000, c('BaseBall', 'SoccerBall', 'BeachBall'),
       cex = 0.8, fill = rainbow(3))

x
## [1]  50  40  32  68 120  92
dotchart(x, labels = c('A', 'B', 'C', 'D', 'E', 'F'), pch = 22)

x <- c(1, 2, 1, 4, 5, 4, 5, 2, 3, 5, 2, 6, 7, 3,
       7, 8, 6, 5, 4, 7, 7, 6, 5, 7, 8, 9, 8)
hist(x, xlim = c(0, 10), ylim = c(0, 6),
     nclass = 12, main = 'Call number of #1 Topic')

T_sales <- c(210, 110, 400, 550, 700, 130)  
pie(T_sales)

pie(T_sales, init.angle = 90, col = rainbow(length(T_sales)),
    labels = c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'))

legend(1, 1, c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'),
       cex = 0.8, fill = rainbow(length(T_sales)))

library(plotrix)

T_sales <- c(210, 110, 400, 550, 700, 130)
week <- c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday')
ratio <- round(T_sales / sum(T_sales) * 100, 1)
label <- paste(week, '\n', ratio, '%')

pie3D(T_sales, main = '주간 매출 변동',
      col = rainbow(length(T_sales)), cex = 0.8, labels = label)

abc ; def; ghi
## [1] 110 300 150 280 310
## [1] 180 200 210 190 170
## [1] 210 150 260 210  70
boxplot(abc, def, ghi) 

boxplot(abc, def, ghi, col = c('yellow', 'cyan', 'green'),
        names = c('BaseBall', 'SoccerBall', 'BeachBall'),
        horizontal = TRUE)

plot(1:10)
par(new = T)
plot(10:1)


plot.new()
plot(1:100)

plot.new()
plot(-4:4,-4:4, type = 'n')
points(rnorm(200), rnorm(200), pch = '+', col = 'red')
par(new = T)
points(rnorm(200), rnorm(200), pch = 'o', col = 'cyan') 

x <- c(1:10)
y <- x * x
plot(x, y, type = 'n', main = 'Title')
for (i in 1:5) lines(x, (y + i * 5), col = i, lty = i)

x <- c(1, 3, 6, 8, 9)
y <- c(12, 56, 78, 32, 9)
plot(x, y)

segments(6, 78, 8, 32)
arrows(3, 56, 1, 12)
rect(4, 20, 6, 30, density = 3)

text(4, 40, '이것은 샘플입니다', srt = 55)
mtext('상단의 문자열입니다', side = 3)
mtext('우측의 문자열입니다', side = 4, adj = 0.3)
box(lty = 2, col = 'red')
axis(1, pos = 40, at = 0:10, col = 2)
axis(2, pos = 5, at = 10:60)

x <- c(1:10)
y <- exp(x)
plot(x, y, type = 'n', main = 'Title')
for (i in 1:10) lines(x, (y + i * 5), col = i, lty = i)

y <- x * x
plot(x, y, type = 'n', main = 'Title')
for (i in 1:10) lines(x, (y + i * 5), col = i, lty = i)

mtext('Right Side Text', side = 4, adj = 0.5)
abline(1:2)
box(lty = 2, col = 'red')
axis(1, pos = 50, at = 0:10, col = 2)
axis(2, pos = 6, at = 0:100, col = 3)

x <- c(1, 2, 1, 4, 5, 4, 5, 2, 3, 5, 2, 6, 7, 3, 7, 8, 6, 5, 4, 7, 7, 6, 5, 7, 8, 9, 8)
par(mfrow = c(1, 2))
hist(x)
hist(x, probability = T, main = 'Histogram with density line')
lines(density(x))

x1 = seq(-3, 3, length = 50)
x2 <- seq(-4, 4, length = 60)
f <- function(x1, x2) {
    x1 ^ 2 + x2 ^ 2 + x1 * x2
}
y = outer(x1, x2, FUN = f)
persp(x1, x2, y)

# install.packages('scatterplot3d')
plot3DfishData <- function(x, y, z, data = iris)
{
    require('scatterplot3d')
    fish.variable <- colnames(data)
    scatterplot3d(
        data[, x],
        data[, y],
        data[, z],
        color = c('blue', 'black', 'red', 'green', 'turquoise')
        [data$Species]
        ,
        pch = 19,
        xlab = fish.variable[x],
        ylab = fish.variable[y],
        zlab = fish.variable[z]
    )
}

par(mfrow = c(2, 2))
plot3DfishData(1, 2, 5)
## 필요한 패키지를 로딩중입니다: scatterplot3d
plot3DfishData(1, 2, 3)
plot3DfishData(3, 4, 5)
plot3DfishData(2, 3, 5) 

# install.packages('lattice')
library(lattice)
data(quakes)
head(quakes)
##      lat   long depth mag stations
## 1 -20.42 181.62   562 4.8       41
## 2 -20.62 181.03   650 4.2       15
## 3 -26.00 184.10    42 5.4       43
## 4 -17.97 181.66   626 4.1       19
## 5 -20.42 181.96   649 4.0       11
## 6 -19.68 184.31   195 4.0       12

str(quakes)
## 'data.frame':    1000 obs. of  5 variables:
##  $ lat     : num  -20.4 -20.6 -26 -18 -20.4 ...
##  $ long    : num  182 181 184 182 182 ...
##  $ depth   : int  562 650 42 626 649 195 82 194 211 622 ...
##  $ mag     : num  4.8 4.2 5.4 4.1 4 4 4.8 4.4 4.7 4.3 ...
##  $ stations: int  41 15 43 19 11 12 43 15 35 19 ...

mini <- min(quakes$depth)
maxi <- max(quakes$depth)
mini
## [1] 40
maxi
## [1] 680

r <- ceiling((maxi - mini) / 8)
inf <- seq(mini, maxi, r)
r
## [1] 80
inf
## [1]  40 120 200 280 360 440 520 600 680

quakes$depth.cat <- factor(floor((quakes$depth - mini) / r), labels = paste(inf, inf + r, sep = '-'))

xyplot(lat ~ long | depth.cat, data = quakes, main = 'EarthQuake Data')

cloud(mag ~ lat * long, data = quakes, sub = 'Magnitude With Longitude and Lattide')

bwplot(mag ~ depth.cat, data = quakes, main = 'Depth and Strength Relationship')

cloud(Species ~ Sepal.Length + Petal.Length, data = iris, main = 'IRIS Data View')

splom(iris[, 1:4])

bwplot(Sepal.Length ~ Sepal.Width, data = iris)

# install.packages('ggplot2')  
library(ggplot2)

diamonds
## # A tibble: 53,940 × 10
##    carat cut       color clarity depth table price     x     y     z
##    <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
##  1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
##  2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
##  3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
##  4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
##  5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
##  6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
##  7  0.24 Very Good I     VVS1     62.3    57   336  3.95  3.98  2.47
##  8  0.26 Very Good H     SI1      61.9    55   337  4.07  4.11  2.53
##  9  0.22 Fair      E     VS2      65.1    61   337  3.87  3.78  2.49
## 10  0.23 Very Good H     VS1      59.4    61   338  4     4.05  2.39
## # ℹ 53,930 more rows
g <- diamonds[order(diamonds$table),]
head(g)
## # A tibble: 6 × 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  1.04 Ideal     I     VS1      62.9    43  4997  6.45  6.41  4.04
## 2  0.29 Very Good E     VS1      62.8    44   474  4.2   4.24  2.65
## 3  1    Fair      I     VS1      64      49  3951  6.43  6.39  4.1 
## 4  0.3  Fair      E     SI1      64.5    49   630  4.28  4.25  2.75
## 5  1.02 Fair      F     SI1      61.8    50  4227  6.59  6.51  4.05
## 6  2    Fair      H     SI1      61.2    50 13764  8.17  8.08  4.97
tail(g)
## # A tibble: 6 × 10
##   carat cut   color clarity depth table price     x     y     z
##   <dbl> <ord> <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.71 Fair  D     VS2      55.6    73  2368  6.01  5.96  3.33
## 2  0.5  Fair  E     VS2      79      73  2579  5.21  5.18  4.09
## 3  0.5  Fair  E     VS2      79      73  2579  5.21  5.18  4.09
## 4  0.79 Fair  G     SI1      65.3    76  2362  5.52  5.13  3.35
## 5  0.81 Fair  F     SI2      68.8    79  2301  5.26  5.2   3.58
## 6  2.01 Fair  F     SI1      58.6    95 13387  8.32  8.31  4.87
gg <- ggplot(diamonds, aes(x = carat, y = price))
gg + geom_point()

gg <- ggplot(diamonds, aes(x = carat, y = price))
gg + geom_point(size = 1, shape = 2, color = 'steelblue', stroke = 1)

gg <- ggplot(diamonds, aes(x = carat, y = price))
gg + geom_point(aes(size = carat, shape = cut, color = color, stroke = carat))
## Warning: Using shapes for an ordinal variable is not advised

gg1 <- gg + geom_point(aes(color = color))
gg2 <- gg1 + labs(title = 'Diamonds', x = 'Carat Layer', y = 'Price Layer')
print(gg2)

gg1 <- gg + geom_point(aes(color = color))
gg2 <- gg1 + labs(title = 'Diamonds', x = 'Carat', y = 'Price')
gg2 + theme(text = element_text(color = 'red'))

gg1 <- gg + geom_point(aes(color = color))
gg2 <- gg1 + labs(title = 'Diamonds', x = 'Carat', y = 'Price')
gg3 <- gg2 + theme(plot.title = element_text(size = 25),
                   axis.title.x = element_text(size = 20),
                   axis.title.y = element_text(size = 20),
                   axis.text.x = element_text(size = 15),
                   axis.text.y = element_text(size = 15))
print(gg3)

gg1 <- gg + geom_point(aes(color = color))
gg2 <- gg1 + labs(title = 'Diamonds', x = 'Carat', y = 'Price')
gg3 <- gg2 + theme(plot.title = element_text(size = 25),
                   axis.title.x = element_text(size = 20),
                   axis.title.y = element_text(size = 20),
                   axis.text.x = element_text(size = 15),
                   axis.text.y = element_text(size = 15))
print(gg3)


gg3 + labs(title = 'Plot Title \nSecond Line of Plot Title') +
    theme(plot.title = element_text(face = 'bold', color = 'steelblue', lineheight = 1.2))

gg1 <- gg + geom_point(aes(color = color))
gg2 <- gg1 + labs(title = 'Diamonds', x = 'Carat', y = 'Price')
gg3 <- gg2 + theme(plot.title = element_text(size = 25), 
                   axis.title.x = element_text(size = 20),
                   axis.title.y = element_text(size = 20),
                   axis.text.x = element_text(size = 15),
                   axis.text.y = element_text(size = 15))
print(gg3)


gg3 + scale_colour_manual(name = 'Legend', 
                          values = c(
                              'D' = 'grey',
                              'E' = 'red',
                              'F' = 'blue',
                              'G' = 'yellow',
                              'H' = 'black',
                              'I' = 'green',
                              'J' = 'firebrick'))

gg1 <- gg + geom_point(aes(color = color))
gg2 <- gg1 + labs(title = 'Diamonds', x = 'Carat', y = 'Price')
gg3 <- gg2 + theme(plot.title = element_text(size = 25), 
                   axis.title.x = element_text(size = 20), 
                   axis.title.y = element_text(size = 20), 
                   axis.text.x = element_text(size = 15), 
                   axis.text.y = element_text(size = 15))
print(gg3)

gg3 + coord_cartesian(xlim = c(0, 3), ylim = c(0, 5000)) + geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

gg1 <- gg + geom_point(aes(color = color))
gg2 <- gg1 + labs(title = 'Diamonds', x = 'Carat', y = 'Price')
gg3 <- gg2 + theme(plot.title = element_text(size = 25), 
                   axis.title.x = element_text(size = 20), 
                   axis.title.y = element_text(size = 20), 
                   axis.text.x = element_text(size = 15), 
                   axis.text.y = element_text(size = 15))
print(gg3)


gg3 + coord_flip()

gg1 <- gg + geom_point(aes(color = color))
gg2 <- gg1 + labs(title = 'Diamonds', x = 'Carat', y = 'Price')
gg3 <- gg2 + theme(plot.title = element_text(size = 25), 
                   axis.title.x = element_text(size = 20), 
                   axis.title.y = element_text(size = 20), 
                   axis.text.x = element_text(size = 15), 
                   axis.text.y = element_text(size = 15))
print(gg3)


gg3 + theme(plot.background = element_rect(fill = 'yellowgreen'),
            plot.margin = unit(c(2, 4, 1, 3), 'cm'))

gg1 <- gg + geom_point(aes(color = color))
gg2 <- gg1 + labs(title = 'Diamonds', x = 'Carat', y = 'Price')
gg3 <- gg2 + theme(plot.title = element_text(size = 25), 
                   axis.title.x = element_text(size = 20), 
                   axis.title.y = element_text(size = 20), 
                   axis.text.x = element_text(size = 15), 
                   axis.text.y = element_text(size = 15))
print(gg3)


p1 <- gg3 + geom_hline(yintercept = 5000, size = 2, linetype = 'dotted', color = 'blue')
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
print(p1)

options(scipen = 999)  # turn-off scientific notation like 1e+48
library(ggplot2)
theme_set(theme_bw())  # pre-set the bw theme.
data('midwest', package = 'ggplot2')

# Scatterplot
gg <- ggplot(midwest, aes(x = area, y = poptotal)) +
    geom_point(aes(col = state, size = popdensity)) +
    geom_smooth(method = 'loess', se = F) +
    xlim(c(0, 0.1)) +
    ylim(c(0, 500000)) +
    labs(subtitle = 'Area Vs Population',
         y = 'Population',
         x = 'Area',
         title = 'Scatterplot',
         caption = 'Source: midwest')
plot(gg)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 15 rows containing missing values (`geom_point()`).

library(ggplot2)
data(mpg, package = 'ggplot2') # alternate source: 'http://goo.gl/uEeRGu')
theme_set(theme_bw())  # pre-set the bw theme.

g <- ggplot(mpg, aes(cty, hwy))

# Scatterplot 그림을 그린다. 다음 내용은 배치로 만든 후에 실행할 것
g + geom_point() +
    geom_smooth(method = 'lm', se = F) +
    labs(subtitle = 'mpg: city vs highway mileage',
         y = 'hwy',
         x = 'cty', 
         title = 'Scatterplot with overlapping points', 
         caption = 'Source: midwest')
## `geom_smooth()` using formula = 'y ~ x'

library(ggplot2)
data(mpg, package = 'ggplot2')

# Scatterplot
theme_set(theme_bw()) #pre-set the bw theme.
g <- ggplot(mpg, aes(cty, hwy))
g + geom_count(col = 'tomato3', show.legend = F) +
    labs(subtitle = 'mpg: city vs highway mileage',
         y = 'hwy',
         x = 'cty',
         title = 'Counts Plot')

library(ggplot2)
# install.packages('ggExtra')
library(ggExtra)
data(mpg, package = 'ggplot2')

# Scatterplot
theme_set(theme_bw())
# pre-set the bw theme.
mpg_select <- mpg[mpg$hwy >= 35 & mpg$cty > 27,]
g <- ggplot(mpg, aes(cty, hwy)) +
    geom_count() +
    geom_smooth(method = 'lm', se = F)

ggMarginal(g, type = 'histogram', fill = 'transparent')
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
ggMarginal(g, type = 'boxplot', fill = 'transparent')
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
## Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?

library(ggplot2)
# install.packages('ggcorrplot')
library(ggcorrplot)
# Correlation matrix
data(mtcars)
corr <- round(cor(mtcars), 1)

# Plot
ggcorrplot(corr,
           hc.order = TRUE,
           type = 'lower',
           lab = TRUE,
           lab_size = 3,
           method = 'circle',
           colors = c('tomato2', 'white', 'springgreen3'),
           title = 'Correlogram of mtcars',
           ggtheme = theme_bw)

library(ggplot2)

theme_set(theme_bw())

data('mtcars') # 데이터를 읽는다.
mtcars$`car name` <- rownames(mtcars) # 차 이름을 위한 칼럼을 만든다.

mtcars$mpg_z <- round((mtcars$mpg - mean(mtcars$mpg)) / sd(mtcars$mpg), 2)

mtcars$mpg_type <- ifelse(mtcars$mpg_z < 0, 'below', 'above')
mtcars <- mtcars[order(mtcars$mpg_z), ] # 정렬한다.
mtcars$'car name' <- factor(mtcars$'car name', levels = mtcars$'car name')

ggplot(mtcars, aes(x = `car name`, y = mpg_z, label = mpg_z)) +
    geom_bar(stat = 'identity', aes(fill = mpg_type), width = .5) +
    scale_fill_manual(name = 'Mileage',
                      labels = c('Above Average', 'Below Average'),
                      values = c('above' = '#00ba38', 'below' = '#f8766d')) +
    labs(subtitle = 'Normalised mileage from "mtcars"',
         title = 'Diverging Bars') +
    coord_flip()

library(ggplot2)
theme_set(theme_bw())

# plot
g <- ggplot(mpg, aes(class, cty))
g + geom_violin() +
    labs(title = 'Violin plot',
         subtitle = 'City Mileage vs Class of vehicle',
         caption = 'Source: mpg',
         x = 'Class of Vehicle',
         y = 'City Mileage')

# install.packages('ggplot2')  
library(ggplot2)            
head(iris) 
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
qplot(Sepal.Length, Petal.Length, data = iris)
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.

qplot(Sepal.Length, Petal.Length, data = iris, color = Species, size = Petal.Width)

qplot(Sepal.Length, Petal.Length, data = iris, geom = 'line', color = Species)

qplot(age, circumference, data = Orange, geom = 'line', colour = Tree, main = 'How does orange tree circumference vary with age?')

Titanic
## , , Age = Child, Survived = No
## 
##       Sex
## Class  Male Female
##   1st     0      0
##   2nd     0      0
##   3rd    35     17
##   Crew    0      0
## 
## , , Age = Adult, Survived = No
## 
##       Sex
## Class  Male Female
##   1st   118      4
##   2nd   154     13
##   3rd   387     89
##   Crew  670      3
## 
## , , Age = Child, Survived = Yes
## 
##       Sex
## Class  Male Female
##   1st     5      1
##   2nd    11     13
##   3rd    13     14
##   Crew    0      0
## 
## , , Age = Adult, Survived = Yes
## 
##       Sex
## Class  Male Female
##   1st    57    140
##   2nd    14     80
##   3rd    75     76
##   Crew  192     20
str(Titanic)
##  'table' num [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ...
##  - attr(*, "dimnames")=List of 4
##   ..$ Class   : chr [1:4] "1st" "2nd" "3rd" "Crew"
##   ..$ Sex     : chr [1:2] "Male" "Female"
##   ..$ Age     : chr [1:2] "Child" "Adult"
##   ..$ Survived: chr [1:2] "No" "Yes"
mosaicplot(Titanic, main = 'Titanic Data, Class,Sex,Age,Survival', col = TRUE)

Source Code
---
title: '데이터 시각화 의미와 기법'
author: 'Jinwon Lee'
date: '2022-10-05'
categories: [Exploration, Code, R]
page-layout: full
output:
  prettydoc::html_pretty:
    theme: architect
    highlight: github
editor_options: 
  chunk_output_type: console
mainfont: NanumGothic
---

# Part 4 : 데이터 시각화 의미와 기법

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE,
                      collapse = TRUE,
                      comment = '##')
getwd()
```

```{r 4-01}
employee <- read.csv('./data/employees_kr.csv')
head(employee)
```

```{r 4-02}
sub2008 <- subset(employee, employee$Year == 2008)
head(sub2008)

hist(sub2008$Incentive)
sub2009 <- subset(employee, employee$Year ==2009)
hist(sub2009$Incentive)
```

```{r 4-03}
subMan <- subset(employee, employee$Sex == 'M')
hist(subMan$Incentive)
subWoman <- subset(employee, employee$Sex == 'F')
hist(subWoman$Incentive)
```

```{r 4-04}
subNego <- subset(employee, employee$Nego == "TRUE")
hist(subNego$Incentive)
subNoNego <- subset(employee, employee$Nego == "FALSE")
hist(subNoNego$Incentive)
```

```{r 4-05}
split.screen(c(2, 1))

screen(1)
plot(1:10)
screen(2)
plot(10:1)

close.screen(all = TRUE)
```

```{r 4-06}
split.screen(c(2, 1))

screen(1)
plot(1:10)
screen(2)
plot(10:1)

screen(2)
plot(1:10)

close.screen(all = TRUE)
```

```{r 4-07}
split.screen(c(2, 3))

screen(3)
plot(1:10)
screen(4)
plot(10:1)
screen(1)
plot(1:100)
screen(6)
plot(100:1)

close.screen(all = TRUE)
```

```{r 4-08}
par(mfrow = c(2, 1))

plot(1:10)
plot(10:1)

close.screen(all = TRUE)
```

```{r 4-09}
x <- seq(1, 10, 0.1)
y <- exp(x)
plot(x, y) 
```

```{r 4-10}
plot(x, y, main = 'Minho Graph',
     xlab = 'Time', ylab = 'Income increase')
```

```{r 4-11}
abc <- c(260, 300, 250, 280, 310)
def <- c(180, 200, 210, 190, 170)
ghi <- c(210, 250, 260, 210, 270)
```

```{r 4-12-13-14-15-16}
plot(abc, type = 'o', col = 'red',
     ylim = c(0, 400), axes = F, ann = F)

# type이 'o'이면 점과 선을 연결해서 꺾은선 그래프가 그려지고, 'p'이면 점만 표시
# color는 red이므로 붉은색
# y축의 범위 0 ~ 400
# axes = F이므로 X, Y축을 표시하지 않음
# ann = F이므로 X, Y축의 이름을 표시하지 않음

axis(1, at = 1:5, lab = c('A', 'B', 'C', 'D', 'E'))
axis(2, ylim = c(0, 400)) 

title(main = 'Fruit', col.main = 'red', font.main = 4)
title(xlab = 'Day', col.lab = 'black')
title(ylab = 'Price', col.lab = 'blue') 

lines(def, type = 'o', pch = 21, col = 'green', lty = 2)
lines(ghi, type = 'o', pch = 22, col = 'blue', lty = 2)

legend(4, 400, c('Orange', 'Appple', 'Banana'), cex = 0.8,
       col = c('red', 'green', 'blue'), pch = 21, lty = 1:3)
```

```{r 4-17}
plot(10:1)
par(new = T)
plot(1:10)
par(new = T)
plot(c(6, 6, 6, 6, 6), type = 'o', col = 'red')
```

```{r 4-18}
x <- c(50, 40, 32, 68, 120, 92)
barplot(x, names = 'Total Sale Amount')
```

```{r 4-19}
X_matrix <- matrix(c(50, 40, 32, 68, 120, 92), 3, 2)
X_matrix

split.screen(c(1, 2))

screen(1)
barplot(X_matrix, names = c('Korea', 'America'))

screen(2)
barplot(X_matrix, names = c('Korea', 'America'), beside = T)
```

```{r 4-20}
abc <- c(50, 40, 32, 68, 120, 92)

barplot(abc, main = 'abc', xlab = 'Season', ylab = 'Sales',
        names.arg = c('A', 'B', 'C', 'D', 'E', 'F'), border = 'blue',
        density = c(10, 30, 50, 80, 90, 92))
```

```{r 4-21}
abc <- c(110, 300, 150, 280, 310)
def <- c(180, 200, 210, 190, 170)
ghi <- c(210, 150, 260, 210, 70)

B_Type2 <- matrix(c(abc, def, ghi), 5, 3)
B_Type2

barplot(B_Type2, main = 'Ball Type별 시즌의 판매량', 
        xlab = 'Ball Type', ylab = '매출', beside = T,
        names.arg = c('BaseBall', 'SoccerBall', 'BeachBall'),
        border = 'blue', col = rainbow(5), ylim = (c(0, 400)))

legend(16, 400, c('A시즌', 'B시즌', 'C시즌', 'D시즌', 'E시즌'),
       cex = 0.8, fill = rainbow(5))
```

```{r 4-22}
barplot(t(B_Type2), main = '시즌별 볼타입에 따른 판매량',
        xlab = 'Season', ylab = 'Price', beside = T,
        names.arg = c('A', 'B', 'C', 'D', 'E'),
        border = 'blue', col = rainbow(3), ylim = (c(0, 400)))

legend(16, 400, c('BaseBall', 'SoccerBall', 'BeachBall'),
       cex = 0.8, fill = rainbow(5))
```

```{r 4-23}
barplot(t(B_Type2), main = '시즌별 볼타입에 따른 판매량(누적 표시형)',
        xlab = 'Season', ylab = '매출',
        names.arg = c('A', 'B', 'C', 'D', 'E'),
        border = 'blue', col = rainbow(3), ylim = (c(0, 1000)))

legend(4.5, 1000, c('BaseBall', 'SoccerBall', 'BeachBall'),
       cex = 0.8, fill = rainbow(3))
```

```{r 4-24}
x
dotchart(x, labels = c('A', 'B', 'C', 'D', 'E', 'F'), pch = 22)
```

```{r 4-25}
x <- c(1, 2, 1, 4, 5, 4, 5, 2, 3, 5, 2, 6, 7, 3,
       7, 8, 6, 5, 4, 7, 7, 6, 5, 7, 8, 9, 8)
hist(x, xlim = c(0, 10), ylim = c(0, 6),
     nclass = 12, main = 'Call number of #1 Topic')
```

```{r 4-26}
T_sales <- c(210, 110, 400, 550, 700, 130)  
pie(T_sales)
```

```{r 4-27}
pie(T_sales, init.angle = 90, col = rainbow(length(T_sales)),
    labels = c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'))

legend(1, 1, c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'),
       cex = 0.8, fill = rainbow(length(T_sales)))
```

```{r 4-28}  
library(plotrix)

T_sales <- c(210, 110, 400, 550, 700, 130)
week <- c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday')
ratio <- round(T_sales / sum(T_sales) * 100, 1)
label <- paste(week, '\n', ratio, '%')

pie3D(T_sales, main = '주간 매출 변동',
      col = rainbow(length(T_sales)), cex = 0.8, labels = label)
```

```{r 4-29}
abc ; def; ghi
boxplot(abc, def, ghi) 
```

```{r 4-30}
boxplot(abc, def, ghi, col = c('yellow', 'cyan', 'green'),
        names = c('BaseBall', 'SoccerBall', 'BeachBall'),
        horizontal = TRUE)
```

```{r 4-31}
plot(1:10)
par(new = T)
plot(10:1)

plot.new()
plot(1:100)
```

```{r 4-32}
plot.new()
plot(-4:4,-4:4, type = 'n')
points(rnorm(200), rnorm(200), pch = '+', col = 'red')
par(new = T)
points(rnorm(200), rnorm(200), pch = 'o', col = 'cyan') 
```

```{r 4-33}
x <- c(1:10)
y <- x * x
plot(x, y, type = 'n', main = 'Title')
for (i in 1:5) lines(x, (y + i * 5), col = i, lty = i)
```

```{r 4-34-35}
x <- c(1, 3, 6, 8, 9)
y <- c(12, 56, 78, 32, 9)
plot(x, y)

segments(6, 78, 8, 32)
arrows(3, 56, 1, 12)
rect(4, 20, 6, 30, density = 3)

text(4, 40, '이것은 샘플입니다', srt = 55)
mtext('상단의 문자열입니다', side = 3)
mtext('우측의 문자열입니다', side = 4, adj = 0.3)
box(lty = 2, col = 'red')
axis(1, pos = 40, at = 0:10, col = 2)
axis(2, pos = 5, at = 10:60)
```

```{r 4-36}
x <- c(1:10)
y <- exp(x)
plot(x, y, type = 'n', main = 'Title')
for (i in 1:10) lines(x, (y + i * 5), col = i, lty = i)
y <- x * x
plot(x, y, type = 'n', main = 'Title')
for (i in 1:10) lines(x, (y + i * 5), col = i, lty = i)

mtext('Right Side Text', side = 4, adj = 0.5)
abline(1:2)
box(lty = 2, col = 'red')
axis(1, pos = 50, at = 0:10, col = 2)
axis(2, pos = 6, at = 0:100, col = 3)
```

```{r 4-37}
x <- c(1, 2, 1, 4, 5, 4, 5, 2, 3, 5, 2, 6, 7, 3, 7, 8, 6, 5, 4, 7, 7, 6, 5, 7, 8, 9, 8)
par(mfrow = c(1, 2))
hist(x)
hist(x, probability = T, main = 'Histogram with density line')
lines(density(x))
```

```{r 4-38}
x1 = seq(-3, 3, length = 50)
x2 <- seq(-4, 4, length = 60)
f <- function(x1, x2) {
    x1 ^ 2 + x2 ^ 2 + x1 * x2
}
y = outer(x1, x2, FUN = f)
persp(x1, x2, y)
```

```{r 4-39}
# install.packages('scatterplot3d')
plot3DfishData <- function(x, y, z, data = iris)
{
    require('scatterplot3d')
    fish.variable <- colnames(data)
    scatterplot3d(
        data[, x],
        data[, y],
        data[, z],
        color = c('blue', 'black', 'red', 'green', 'turquoise')
        [data$Species]
        ,
        pch = 19,
        xlab = fish.variable[x],
        ylab = fish.variable[y],
        zlab = fish.variable[z]
    )
}

par(mfrow = c(2, 2))
plot3DfishData(1, 2, 5)
plot3DfishData(1, 2, 3)
plot3DfishData(3, 4, 5)
plot3DfishData(2, 3, 5) 
```

```{r 4-40}
# install.packages('lattice')
library(lattice)
data(quakes)
head(quakes)

str(quakes)

mini <- min(quakes$depth)
maxi <- max(quakes$depth)
mini
maxi

r <- ceiling((maxi - mini) / 8)
inf <- seq(mini, maxi, r)
r
inf

quakes$depth.cat <- factor(floor((quakes$depth - mini) / r), labels = paste(inf, inf + r, sep = '-'))

xyplot(lat ~ long | depth.cat, data = quakes, main = 'EarthQuake Data')
```

```{r 4-41}
cloud(mag ~ lat * long, data = quakes, sub = 'Magnitude With Longitude and Lattide')
```

```{r 4-42}
bwplot(mag ~ depth.cat, data = quakes, main = 'Depth and Strength Relationship')
```

```{r 4-43}
cloud(Species ~ Sepal.Length + Petal.Length, data = iris, main = 'IRIS Data View')
splom(iris[, 1:4])
bwplot(Sepal.Length ~ Sepal.Width, data = iris)
```

```{r 4-44}
# install.packages('ggplot2')  
library(ggplot2)

diamonds
g <- diamonds[order(diamonds$table),]
head(g)
tail(g)
```

```{r 4-45}
gg <- ggplot(diamonds, aes(x = carat, y = price))
gg + geom_point()
```

```{r 4-46}
gg <- ggplot(diamonds, aes(x = carat, y = price))
gg + geom_point(size = 1, shape = 2, color = 'steelblue', stroke = 1)
```

```{r 4-47}
gg <- ggplot(diamonds, aes(x = carat, y = price))
gg + geom_point(aes(size = carat, shape = cut, color = color, stroke = carat))
```

```{r 4-48}
gg1 <- gg + geom_point(aes(color = color))
gg2 <- gg1 + labs(title = 'Diamonds', x = 'Carat Layer', y = 'Price Layer')
print(gg2)
```

```{r 4-49}
gg1 <- gg + geom_point(aes(color = color))
gg2 <- gg1 + labs(title = 'Diamonds', x = 'Carat', y = 'Price')
gg2 + theme(text = element_text(color = 'red'))
```

```{r 4-50}
gg1 <- gg + geom_point(aes(color = color))
gg2 <- gg1 + labs(title = 'Diamonds', x = 'Carat', y = 'Price')
gg3 <- gg2 + theme(plot.title = element_text(size = 25),
                   axis.title.x = element_text(size = 20),
                   axis.title.y = element_text(size = 20),
                   axis.text.x = element_text(size = 15),
                   axis.text.y = element_text(size = 15))
print(gg3)
```

```{r 4-51}
gg1 <- gg + geom_point(aes(color = color))
gg2 <- gg1 + labs(title = 'Diamonds', x = 'Carat', y = 'Price')
gg3 <- gg2 + theme(plot.title = element_text(size = 25),
                   axis.title.x = element_text(size = 20),
                   axis.title.y = element_text(size = 20),
                   axis.text.x = element_text(size = 15),
                   axis.text.y = element_text(size = 15))
print(gg3)

gg3 + labs(title = 'Plot Title \nSecond Line of Plot Title') +
    theme(plot.title = element_text(face = 'bold', color = 'steelblue', lineheight = 1.2))
```

```{r 4-52}
gg1 <- gg + geom_point(aes(color = color))
gg2 <- gg1 + labs(title = 'Diamonds', x = 'Carat', y = 'Price')
gg3 <- gg2 + theme(plot.title = element_text(size = 25), 
                   axis.title.x = element_text(size = 20),
                   axis.title.y = element_text(size = 20),
                   axis.text.x = element_text(size = 15),
                   axis.text.y = element_text(size = 15))
print(gg3)

gg3 + scale_colour_manual(name = 'Legend', 
                          values = c(
                              'D' = 'grey',
                              'E' = 'red',
                              'F' = 'blue',
                              'G' = 'yellow',
                              'H' = 'black',
                              'I' = 'green',
                              'J' = 'firebrick'))
```

```{r 4-53}
gg1 <- gg + geom_point(aes(color = color))
gg2 <- gg1 + labs(title = 'Diamonds', x = 'Carat', y = 'Price')
gg3 <- gg2 + theme(plot.title = element_text(size = 25), 
                   axis.title.x = element_text(size = 20), 
                   axis.title.y = element_text(size = 20), 
                   axis.text.x = element_text(size = 15), 
                   axis.text.y = element_text(size = 15))
print(gg3)
gg3 + coord_cartesian(xlim = c(0, 3), ylim = c(0, 5000)) + geom_smooth()
```

```{r 4-54}
gg1 <- gg + geom_point(aes(color = color))
gg2 <- gg1 + labs(title = 'Diamonds', x = 'Carat', y = 'Price')
gg3 <- gg2 + theme(plot.title = element_text(size = 25), 
                   axis.title.x = element_text(size = 20), 
                   axis.title.y = element_text(size = 20), 
                   axis.text.x = element_text(size = 15), 
                   axis.text.y = element_text(size = 15))
print(gg3)

gg3 + coord_flip()
```

```{r 4-55}
gg1 <- gg + geom_point(aes(color = color))
gg2 <- gg1 + labs(title = 'Diamonds', x = 'Carat', y = 'Price')
gg3 <- gg2 + theme(plot.title = element_text(size = 25), 
                   axis.title.x = element_text(size = 20), 
                   axis.title.y = element_text(size = 20), 
                   axis.text.x = element_text(size = 15), 
                   axis.text.y = element_text(size = 15))
print(gg3)

gg3 + theme(plot.background = element_rect(fill = 'yellowgreen'),
            plot.margin = unit(c(2, 4, 1, 3), 'cm'))
```

```{r 4-56}
gg1 <- gg + geom_point(aes(color = color))
gg2 <- gg1 + labs(title = 'Diamonds', x = 'Carat', y = 'Price')
gg3 <- gg2 + theme(plot.title = element_text(size = 25), 
                   axis.title.x = element_text(size = 20), 
                   axis.title.y = element_text(size = 20), 
                   axis.text.x = element_text(size = 15), 
                   axis.text.y = element_text(size = 15))
print(gg3)

p1 <- gg3 + geom_hline(yintercept = 5000, size = 2, linetype = 'dotted', color = 'blue')
print(p1)
```

```{r 4-57}
options(scipen = 999)  # turn-off scientific notation like 1e+48
library(ggplot2)
theme_set(theme_bw())  # pre-set the bw theme.
data('midwest', package = 'ggplot2')

# Scatterplot
gg <- ggplot(midwest, aes(x = area, y = poptotal)) +
    geom_point(aes(col = state, size = popdensity)) +
    geom_smooth(method = 'loess', se = F) +
    xlim(c(0, 0.1)) +
    ylim(c(0, 500000)) +
    labs(subtitle = 'Area Vs Population',
         y = 'Population',
         x = 'Area',
         title = 'Scatterplot',
         caption = 'Source: midwest')
plot(gg)
```

```{r 4-58}
library(ggplot2)
data(mpg, package = 'ggplot2') # alternate source: 'http://goo.gl/uEeRGu')
theme_set(theme_bw())  # pre-set the bw theme.

g <- ggplot(mpg, aes(cty, hwy))

# Scatterplot 그림을 그린다. 다음 내용은 배치로 만든 후에 실행할 것
g + geom_point() +
    geom_smooth(method = 'lm', se = F) +
    labs(subtitle = 'mpg: city vs highway mileage',
         y = 'hwy',
         x = 'cty', 
         title = 'Scatterplot with overlapping points', 
         caption = 'Source: midwest')
```

```{r 4-59}
library(ggplot2)
data(mpg, package = 'ggplot2')

# Scatterplot
theme_set(theme_bw()) #pre-set the bw theme.
g <- ggplot(mpg, aes(cty, hwy))
g + geom_count(col = 'tomato3', show.legend = F) +
    labs(subtitle = 'mpg: city vs highway mileage',
         y = 'hwy',
         x = 'cty',
         title = 'Counts Plot')
```

```{r 4-60}
library(ggplot2)
# install.packages('ggExtra')
library(ggExtra)
data(mpg, package = 'ggplot2')

# Scatterplot
theme_set(theme_bw())
# pre-set the bw theme.
mpg_select <- mpg[mpg$hwy >= 35 & mpg$cty > 27,]
g <- ggplot(mpg, aes(cty, hwy)) +
    geom_count() +
    geom_smooth(method = 'lm', se = F)

ggMarginal(g, type = 'histogram', fill = 'transparent')
ggMarginal(g, type = 'boxplot', fill = 'transparent')
```

```{r 4-61}
library(ggplot2)
# install.packages('ggcorrplot')
library(ggcorrplot)
# Correlation matrix
data(mtcars)
corr <- round(cor(mtcars), 1)

# Plot
ggcorrplot(corr,
           hc.order = TRUE,
           type = 'lower',
           lab = TRUE,
           lab_size = 3,
           method = 'circle',
           colors = c('tomato2', 'white', 'springgreen3'),
           title = 'Correlogram of mtcars',
           ggtheme = theme_bw)
```

```{r 4-62}
library(ggplot2)

theme_set(theme_bw())

data('mtcars') # 데이터를 읽는다.
mtcars$`car name` <- rownames(mtcars) # 차 이름을 위한 칼럼을 만든다.

mtcars$mpg_z <- round((mtcars$mpg - mean(mtcars$mpg)) / sd(mtcars$mpg), 2)

mtcars$mpg_type <- ifelse(mtcars$mpg_z < 0, 'below', 'above')
mtcars <- mtcars[order(mtcars$mpg_z), ] # 정렬한다.
mtcars$'car name' <- factor(mtcars$'car name', levels = mtcars$'car name')

ggplot(mtcars, aes(x = `car name`, y = mpg_z, label = mpg_z)) +
    geom_bar(stat = 'identity', aes(fill = mpg_type), width = .5) +
    scale_fill_manual(name = 'Mileage',
                      labels = c('Above Average', 'Below Average'),
                      values = c('above' = '#00ba38', 'below' = '#f8766d')) +
    labs(subtitle = 'Normalised mileage from "mtcars"',
         title = 'Diverging Bars') +
    coord_flip()
```

```{r 4-63}
library(ggplot2)
theme_set(theme_bw())

# plot
g <- ggplot(mpg, aes(class, cty))
g + geom_violin() +
    labs(title = 'Violin plot',
         subtitle = 'City Mileage vs Class of vehicle',
         caption = 'Source: mpg',
         x = 'Class of Vehicle',
         y = 'City Mileage')
```

```{r 4-64}
# install.packages('ggplot2')  
library(ggplot2)            
head(iris) 
```

```{r 4-65}
qplot(Sepal.Length, Petal.Length, data = iris)
```

```{r 4-66}
qplot(Sepal.Length, Petal.Length, data = iris, color = Species, size = Petal.Width)
```

```{r 4-67}
qplot(Sepal.Length, Petal.Length, data = iris, geom = 'line', color = Species)
```

```{r 4-68}
qplot(age, circumference, data = Orange, geom = 'line', colour = Tree, main = 'How does orange tree circumference vary with age?')
```

```{r 4-69}
Titanic
str(Titanic)
mosaicplot(Titanic, main = 'Titanic Data, Class,Sex,Age,Survival', col = TRUE)
```